global g_root "C:\Users\Hongye Guo\Dropbox (Personal)\Density\Code\Production\Submission"

use "$g_root\Data\stacked_qtr_202106.dta", clear

* Basic processing
{
	* Date conversion
	gen test = date(fpedats, "DMY", 1990)
	drop fpedats 
	rename test fpedats 
	duplicates drop 

	* Actuals
	merge m:1 permno fpedats using "$g_root\Data\IBES_actuals.dta", keep(1 3) nogen
	rename forecast_month ym 

	* Generate stock-fp-month level of revisions and surprises 
	sort permno fpedats ym
	by permno fpedats: gen med_value_usd_chg = med_value_usd - med_value_usd[_n-1]
	by permno fpedats: gen sup_usd = actual_usd - med_value_usd[_n-1]

	merge m:1 permno ym using "$g_root/Data\crsp_prc_shrout.dta", keep(1 3) nogen
	drop if mi(mkval_l1m) | mi(prc_adj_l1m)

	gen yq = qofd(dofm(ym))
}

* Scale the surprises by adjusted stock prices, as is standard in the literature
{
	gen sup_final = sup_usd/prc_adj_l1m
	
	* This quarterly winsorization logic is better as it does not incur look-ahead bias
	* Use the older version to be consistent with the initial submission
	* Using the new version strengthen the results in Table 7
	/*
	sort yq
	by yq: egen p_up = pctile(sup_final), p(99)
	by yq: egen p_down = pctile(sup_final), p(1)
	gen up_win_flag = !mi(sup_final) & sup_final>p_up
	gen low_win_flag = !mi(sup_final) & sup_final<p_down
	replace sup_final = p_up if !mi(sup_final) & sup_final>p_up
	replace sup_final = p_down if !mi(sup_final) & sup_final<p_down
	replace sup_final = . if ym != mofd(anndats_act)
	drop p_up p_down
	*/
	
	qui sum sup_final, d
	gen up_win_flag = !mi(sup_final) & sup_final>`r(p99)'
	gen low_win_flag = !mi(sup_final) & sup_final<`r(p1)'
	replace sup_final = `r(p99)' if !mi(sup_final) & sup_final>`r(p99)'
	replace sup_final = `r(p1)' if !mi(sup_final) & sup_final<`r(p1)'
	replace sup_final = . if ym != mofd(anndats_act)
	*/

}

* Decompose the term structure of the revisions and surprises
{
	gen ym_anndats = mofd(anndats_act)
	gen month_dis = ym_anndats - ym
	gen yq_anndats = qofd(anndats_act)
	gen quarter_dis = yq_anndats - yq
	drop if month_dis > 13 | month_dis < 0 
}

* Aggregate to the month-month_dist level
{
	sort ym month_dis
	gen num = mkval_l1m * sup_final
	by ym month_dis: egen temp_num = total(num) if !mi(mkval_l1m)
	by ym month_dis: egen temp_den = total(mkval_l1m) if !mi(num)
	gen temp_rat = temp_num/temp_den
	by ym month_dis: egen ym_md_sup_final = max(temp_rat)
	drop temp_num temp_rat num temp_den
	
}

* Go to ym-month_dist level
{
	by ym month_dis: gen ym_md_flag = _n==1
	keep ym month_dis ym_md_flag ym_md_*
	keep if ym_md_flag
	drop ym_md_flag
	reshape wide ym_md_*, i(ym) j(month_dis)
}

* Lead lag
{
	tsset ym
	sort ym
	forvalues i= 1/12{
		gen ym_md_sup_final0_f`i'm = F`i'.ym_md_sup_final0
	}

	* Generate Relevant variables
	gen month_dv = month(dofm(ym + 1))
	gen am_flag_dv = mod(month_dv, 3)==1

	forvalues i = 1/12{
		gen ym_md_sup_final0_l`i'm = L`i'.ym_md_sup_final0
	}

	* The beginning of the surpirse time series is unreasonably volatile
	* Drop them, though it does not matter when the sample starts
	* As long as the very extreme observations at the beginning of the sample are removed
	tw line ym_md_sup_final0 ym 
	drop if ym<ym(1986, 6)

	egen ym_md_sup_final0_t0_11m = rowtotal(ym_md_sup_final0 ym_md_sup_final0_l1m-ym_md_sup_final0_l11m)
	gen ym_md_sup_final0_t0_11mxam = ym_md_sup_final0_t0_11m * am_flag_dv

	gen ym_md_sup_final0_l0m = ym_md_sup_final0
	forvalues i = 1/4{
		local j = (`i'-1)*3 
		local jp1 = `j' + 1
		local jp2 = `j' + 2
		foreach l_var in ym_md_sup_final0 {
			gen `l_var'_prevq1_l`i'q = `l_var'_l`j'm
			replace `l_var'_prevq1_l`i'q = `l_var'_l`jp1'm if mod(month_dv,3)==0
			replace `l_var'_prevq1_l`i'q = `l_var'_l`jp2'm if mod(month_dv,3)==1
		}
	}

	foreach l_var in ym_md_sup_final0{
		gen `l_var'_prevq1_t4q = `l_var'_prevq1_l1q + `l_var'_prevq1_l2q + `l_var'_prevq1_l3q + `l_var'_prevq1_l4q
		gen `l_var'_prevq1_t4q_xam = `l_var'_prevq1_t4q * am_flag_dv
	}
}

* Table 7
clear matrix
local col_count = 0
tsset ym

newey ym_md_sup_final0_f1m ym_md_sup_final0_t0_11m, lag(24) force
local col_count = `col_count' + 1
estimates store sep_`col_count'	

preserve 
keep if am_flag_dv
sort ym
gen lserial = _n
tsset lserial 
newey ym_md_sup_final0_f1m ym_md_sup_final0_t0_11m if am_flag_dv , lag(8) force
local col_count = `col_count' + 1
estimates store sep_`col_count'	
restore

preserve 
keep if !am_flag_dv	
sort ym
gen lserial = _n
tsset lserial 
newey ym_md_sup_final0_f1m ym_md_sup_final0_t0_11m if !am_flag_dv, lag(16) force
local col_count = `col_count' + 1
estimates store sep_`col_count'	
restore

newey ym_md_sup_final0_f1m ym_md_sup_final0_t0_11m ym_md_sup_final0_t0_11mxam am_flag_dv, lag(24) force
local col_count = `col_count' + 1
estimates store sep_`col_count'	

newey ym_md_sup_final0_f1m ym_md_sup_final0_prevq1_t4q ym_md_sup_final0_prevq1_t4q_xam am_flag_dv, lag(24) force
local col_count = `col_count' + 1
estimates store sep_`col_count'	
		
local l_date : display %tdCYND date(c(current_date), "DMY")
local l_time = substr(c(current_time), 1, 2) + substr(c(current_time), 4, 2)
esttab sep_* using "$g_root/Output/AggSup_`l_date'`l_time'.csv", star(* 0.1  ** 0.05 *** 0.01) b(3) t(2) r2 br noomit replace 
